\ Portable, Stack Based String Library for ANS Forth. Version 1.0
\ Mark Wills February 2014. Public Domain.

\ string format:
\   String constants (held in STRING types):
\   max_len  actual_len  <string_data>  <?>
\      |           |          |          |
\    cell        cell       chars     padding (if required)

\   Transient strings (held on the string stack):
\   actual_len  <string_data>  <?>
\      |           |            |
\    cell        chars       padding (if required)

\ Environmental dependancy declarations:
\ Word     | ANS Library | ANS Ref  | Dependencies 
\ ---------+-------------+----------+-------------------------------
\   -ROT   |     N/A     |   N/A    | :="
\   .R     | core ext    | 6.2.0210 | $.S
\   HERE   | core        | 6.1.1650 | SWAP$ +$ REV$ LTRIM$ REPLACE$
\   PARSE  | core ext    | 6.2.2008 | :=" $"
\   PICK   | core ext    | 6.2.2030 | FINDC$
\   WITHIN | core ext    | 6.2.2440 | UCASE$ LCASE$
\
\ Note: The word -ROT is not an ANS word. It can be defined in terms of ANS
\       words as follows: : -ROT ( a b c -- c b a ) ROT ROT ;

\ Throw codes used by this library:
\ Throw Code|Nature of Error
\ ----------+-----------------------------------------
\    9900   | String stack underflow
\    9901   | String too large to assign
\    9902   | String stack is empty
\    9903   | Need at least 2 strings on string stack
\    9904   | String too large for string constant
\    9905   | Illegal LEN value
\    9906   | Need at least 3 strings on string stack
\    9907   | String is not a legal number
\    9908   | Illegal start value

base @ \ save systems' current number base
decimal

\ Set up string stack. The stack grows towards lower memory addresses.
256 \ maximum string stack size in bytes.
    \ Adjust to your own needs. Choose a value that is a multiple of your 
    \ systems' cell size.
        
constant ($sSize)          \ store stack size
here ($sSize) allot        \ reserve space for string stack 
constant ($sEnd)           \ bottom of string stack

variable ($sp)             \ pointer to top of string stack
($sEnd) ($sSize) + ($sp) ! \ initialise it
variable ($depth)          \ count of items on the string stack

variable ($temp0)          \ reserved for internal use
variable ($temp1)          \ reserved for internal use
variable ($temp2)          \ reserved for internal use
variable ($temp3)          \ reserved for internal use

\ General Note:
\ Words surrounded by parenthesis are for low-level internal use by the string
\ library, and should not need to be called by higher-level application code.

: ($depth+) ( -- ) 
    \ Increments the string stack item count
    1 ($depth) +! ;
    
: ($sp@) ( -- addr ) \ "string stack pointer fetch"
    \ Returns address of current top of string stack
    ($sp) @ ;

: ($rUp) ( n -- n|n+1)
    \ Rounds n up to the next even value
    1+ -2 and ;

: (sizeOf$) ( $addr - $size)
    \ Given an address of a transient string, compute the stack  size in bytes
    \ required to hold it, rounded up to the nearest cell size, and including
    \ the length cell.
    @ ($rUp) cell+ ;

: (set$SP) ( $size -- )
    \ Given the stack size of a transient string set the string stack pointer
    \ to the new address required to accomodate it.
    negate dup ($sp@) + ($sEnd) < if 9900 throw then 
    ($sp) +! ;

: (addrOf$) ( index -- addr )
    \ Given an index into the string stack, return the start address of the 
    \ string. addr points to the length cell. Topmost string is index 0,
    \ next string is index 1 and so on.
    ($sp@) swap dup if 0 do dup (sizeOf$) + loop else drop then ;
    
: (lenOf$) ( $addr -- len )
    \ Given the address of a transient string on the string stack (the address
    \ of the length cell), return the length of the string.
    \ Note: Immediate, compiling word for performance reasons.
    \       Modern compilers will inline this.
    state @ if postpone @ else @ then ; immediate

: depth$ ( -- $sDepth)
    \ Returns the depth of the string stack.
    ($depth) @ ;

: $const ( max_len tib:"name" -- ) ( runtime: -- $Caddr) \ "string constant"
    \ Creates a string constant. When "name" is referenced the address of the
    \ max_len field is pushed to the stack.
    \ e.g. 100 string msg
    \ The above creates a string called msg with capacity for 100 characters.
    create  dup ( max_len) , ( actual_len) 0 ,  allot align ;
    
: clen$ ( $Caddr -- len ) \ "string constant length"
    \ Given the address of a string constant, returns its length.
    cell+ @ ;
    
: maxLen$ ( $Caddr -- max_len ) \ "maximum length of string"
    \ Given the address of a string constant, returns its maximum length.
    \ Dependencies: (lenOf$)
    (lenOf$) ;

: .$const ( $Caddr -- ) \ "display string constant"
    \ Displays the string constant. e.g. fred .$const
    \ Dependencies: (lenOf$)
    cell+ dup (lenOf$) swap cell+ swap type ;
    
: :=" ( $Caddr tib:"string" -- ) \ "assign string constant"
    \ Assigns the string "string" to the string constant.
    \ e.g. msg :=" hello mother!"
    \ Dependencies: PARSE (core ext, 6.2.2008)
    dup @ [char] " parse swap >r
    2dup < if 9901 throw then
    nip 2dup swap cell+ !
    >r [ 2 cells ] literal + r> r> -rot cmove ;

: ($") ( addr len -- ) ( ss: -- str )
    \ Run-time action for $" (see below).
    \ Dependencies: ($rUp) ($set$SP) ($sp) ($depth+)
    dup ($rUp) cell+ (set$SP)
    dup ($sp@) !  ($sp@) cell+ swap cmove  ($depth+) ;

: $" ( tib:"string" -- ) ( ss: -- str) \ "string to string stack"
    \ Pushes a string directly to the string stack.
    \ e.g. $" hello world" .$
    \ Dependencies: ($") PARSE (core ext, 6.2.2008)
    \ Note: State smart word. Runtime behaviour is in ($")
    state @ if
        postpone s"  postpone ($")
    else
        [char] " parse  ($")
    then ; immediate 

: >$ ( $Caddr -- ) ( ss: -- str) \ "to string stack"
    \ Moves a string constant to the string stack
    \ e.g. msg >$
    \ Dependencies: (lenOf$) ($")
    cell+ dup (lenOf$) swap cell+ swap ($") ;

: pick$ ( n -- ) ( ss: -- strN) \ "pick string"
    \ Given an index into the string stack, copy the indexed string to the top
    \ of the string stack.
    \ 0 $pick is equivalent to $DUP
    \ 1 $pick is equivalent to $OVER etc.
    \ Dependencies: (lenOf$) depth$ ($addrOf$) ($")
    depth$ 0= if 9902 throw then 
    (addrOf$) dup (lenOf$) swap cell+ swap ($") ;

: dup$ ( -- ) ( ss: s1 -- s1 s1) \ "duplicate string"
    \ Duplicates a string on the string stack.
    \ Dependencies: depth$ pick$
    depth$ 0= if 9902 throw then 
    0 pick$ ;

: drop$ ( -- ) ( ss: str -- ) \ "drop string"
    \ Drops the top string from the string stack.
    \ Dependencies: depth$ (sizeOf$) (set$SP)
    depth$ 0= if 9900 throw then
    ($sp@) (sizeOf$) negate (set$SP)   -1 ($depth) +! ;
    
: swap$ ( -- ) ( ss: s1 s2 -- s2 s1) \ "swap string"
    \ Swaps the top two string items on the string stack.
    \ Dependencies: depth$ (sizeOf$) (addrOf$) HERE (core 6.1.1650)
    depth$ 2 < if 9903 throw then 
    ($sp@) dup (sizeOf$) here swap cmove
    1 (addrOf$) dup (sizeOf$) ($sp@) swap cmove
    here dup (sizeOf$)  ($sp@) dup (sizeOf$) + swap cmove ;

: nip$ ( -- ) ( ss: s1 s2 -- s2) \ "nip string"
    \ Remove the string under the top string.
    \ Dependencies: swap$ drop$ depth$
    depth$ 2 < if 9903 throw then 
    swap$ drop$ ;
    
: over$ ( -- ) ( ss: s1 s2 -- s1 s2 s1) \ "over string"
    \ Move a copy of s1 to top of string stack.
    \ Dependencies: pick$ depth$
    depth$ 2 < if 9903 throw then
    1 pick$ ;
    
: (rot$) ( -- ) ( ss: s6 s5 s4 s3 s2 s1 -- s3 s2 s1)
    \ move the top three strings downwards by three strings
    \ internal factor of both ROT$ and -ROT$
    \ Dependencies: ($sp@) (sizeOf$) (addrOf$) ($sp) ($depth)
    ($sp@) (sizeOf$)  1 (addrOf$) (sizeOf$)  2 (addrOf$) (sizeOf$) + +  cmove
    3 (addrOf$) ($sp) !  -3 ($depth) +! ;

: rot$ ( -- ) ( ss: s3 s2 s1 -- s2 s1 s3) \ "rotate strings"
    \ Rotates the top three string to the left.
    \ The third string moves to the top of the string stack.
    \ Note: For ease of implementation, this routine copies (using PICK$)
    \ the strings to the top of the string stack in their correct final
    \ order, then removes the 3 original strings underneath.
    \ Consequently, it is possible to run out of string stack space.
    \ If this happens, the condition will be correctly caught in (set$SP).
    \ Dependencies: pick$ (rot$) depth$
    depth$ 3 < if 9906 throw then
    1 pick$  1 pick$  4 pick$ (rot$) ;

: -rot$ ( -- ) ( ss: s3 s2 s1 -- s1 s3 s2) \ "rotate strings"
    \ Rotates the top three string to the right.
    \ The top string moves to the third position.
    \ Note: For ease of implementation, this routine copies (using PICK$)
    \ the strings to the top of the string stack in their correct final
    \ order, then removes the 3 original strings underneath.
    \ Consequently, it is possible to run out of string stack space.
    \ If this happens, the condition will be correctly caught in (set$SP).
    \ Dependencies: pick$ (rot$) depth$
    depth$ 3 < if 9906 throw then
    0 pick$  3 pick$  3 pick$ (rot$) ;
    
: len$ ( -- len ) ( ss: -- ) \ "length of string"
    \ Returns the length of the topmost string.
    \ Dependencies: none
    depth$ 1 < if 9902 throw then 
    ($sp@) @ ;    

: >$const ( $Caddr -- ) ( ss: str -- ) \ "to string constant"
    \ Move top of string stack to the string constant.
    \ e.g. $" blue" fred >$const  fred .$const 
    \ displays "blue"    
    \ Dependencies: depth$ (sizeOf$) drop$
    >r  depth$ 1 < if 9902 throw then
    len$ r@ @ > if 9904 throw then
    ($sp@) dup (sizeOf$) r> cell+ swap cmove drop$ ;

: +$ ( -- ) ( ss: s1 s2 -- s2+s1) \ concatenate strings
    \ Replaces the top most two strings on the string stack with their
    \ concatenated equivalent.
    \ eg: $" red" $" blue" +$ .$
    \ displays "redblue"
    \ Dependencies: depth$ (addrOf$) (lenOf$) len$ drop$ HERE (core 6.1.1650)
    depth$ 2 < if 9903 throw then 
    1 (addrof$) cell+  here   1 (addrof$) (lenof$)  cmove
    ($sp@) cell+   1 (addrof$) (lenof$) here +  len$ cmove
    here len$ 1 (addrof$) (lenof$) +  drop$ drop$  ($") ;    

: mid$ ( start len -- ) ( ss: str1 -- str1 str2) \ "mid-string"
    \ The characters from start to start+len are pushed to the string stack
    \ as a new string. The original string is retained.
    \ Dependencies: len$ ($")
    depth$ 1 < if 9902 throw then 
    dup len$ >  over 1 < or  if 9905 throw then
    over dup len$ >  swap 0< or if 9908 throw then 
    swap ($sp@) cell+ +  swap  ($") ;

: left$ ( len -- ) ( ss: str1 -- str1 str2) \ "left of string"
    \ The leftmost len characters are pushed to  the string stack as a new 
    \ string. The original string is retained.
    \ Dependencies: mid$
    depth$ 1 < if 9902 throw then 
    dup len$ > over 1 < or if 9905 throw then 
    0 ($sp@) cell+ +  swap  ($") ;
   
: right$ ( len -- ) ( ss: str1 -- str1 str2) \ "right of string"
    \ The rightmost len characters, pushed to the string stack as a new string.
    \ the original string is retained.
    \ Dependencies: (lenOf$) mid$ 
    depth$ 1 < if 9902 throw then 
    dup len$ > over 1 < or if 9905 throw then 
    ($sp@) (lenOf$) over - ($sp@) cell+ +  swap  ($") ;

: findc$ ( char -- pos|-1 ) ( ss: -- ) \ "find character in string"
    \ Returns the first occurance of the character char in  the top string.
    \ The string is retained. Returns -1 if the char is not found.
    \ Dependencies: PICK (ANS core ext) depth$
    depth$ 1 < if 9902 throw then 
    ($sp@) cell+  ($sp@) (lenOf$) 0 do
        dup c@ 2 pick = if i -1 leave then 1+ loop
     -1 = if nip nip else drop -1 then ;

: find$ ( offset -- pos|-1 ) ( ss: s1 s2 -- s1) \ "find string"
    \ Searches string s1, beginning at offset, for the substring s2.
    \ If the string is found, returns the position of the string relative
    \ to the offset, otherwise returns -1.
    \ Dependencies: depth$ len$ (addrOf$) (lenOf$) drop$
    depth$ 2 < if 9903 throw then 
    len$ ($temp1) !    1 (addrOf$) (lenOf$) ($temp0) !
    dup ($temp0) @ > if drop -1 exit then 
    1 (addrOf$) cell+ + ($temp2) !    ($sp@) cell+ ($temp3) !
    ($temp1) @ ($temp0) @ > if drop -1 exit then 
    0  ($temp0) @ 0 do
        ($temp3) @ over + c@ 
        ($temp2) @ i + c@ = if
            1+ dup ($temp1) @ = if 
                drop i ($temp1) @ - 1+   -2 leave then 
        else drop 0 then
    loop 
    dup -2 = if drop else drop -1 then drop$ ;

: .$ ( -- ) ( ss: str -- ) \ "display string"
    \ Pop and display the topmost string from string stack.
    \ Dependencies: depth$ (lenOf$) drop$
    depth$ 0= if 9902 throw then 
    ($sp@) cell+ ($sp@) (lenOf$) type  drop$ ;
    
: rev$ ( -- ) ( ss: s1 -- s2 ) \ "reverse string"
    \ Reverse topmost string on string stack.
    \ Dependencies: depth$ (lenOf$) HERE (core 6.1.1650)
    depth$ 0= if 9902 throw then 
    ($sp@) dup cell+ >r  (lenOf$)  r> swap here swap cmove 
    ($sp@) (lenOf$) here 1- +
    ($sp@) cell+  dup ($sp@) (lenOf$) +   swap do
        dup c@ i c!  1- loop  drop ;

: ltrim$ ( -- ) ( ss: s1 -- s2 ) \ "left trim string"
    \ Removes leading spaces from s1, resulting in s2.
    \ Dependencies: depth$ (lenOf$) (sizeOf$) drop$ HERE (core 6.1.1650)
    depth$ 0= if 9902 throw then  
    ($sp@) dup (lenOf$) >r  here over (sizeOf$)  cmove
    0  r> here cell+ dup >r +  r> do
        i c@ bl = if 1+ else leave then loop 
    dup 0> if 
        >r  ($sp@) (lenOf$)  drop$
        here cell+ r@ +  swap r> -  ($")
    else drop then ;

: rtrim$ ( -- ) ( ss: s1 -- s2 ) \ "right trim string"
    \ Removes trailing spaces from s1, resulting in s2.
    \ Dependencies: depth$ rev$ ltrim$
    depth$ 0= if 9902 throw then
    rev$ ltrim$ rev$ ;

: $trim ( -- ) ( ss: s1 -- s2 ) \ "trim string"
    \ Remove both leading and trailing spaces from s1, resulting in s2.
    \ Dependencies: rtrim$ ltrim$
    rtrim$ ltrim$ ;

: replace$ ( -- pos ) ( found: ss: s1 s2 s3 -- s4  not found: s1 s2 -- s1 s2)
    \ In string s2 find s3 and replace with s1, resulting in s4. 
    \ If a replacement is made, the starting position of the replacement is 
    \ returned, otherwise -1 is returned.
    \ Dependencies: depth$ find$ (addrOf$) (lenOf$) drop$ ($")
    \               nip$ HERE (core 6.1.1650)
    depth$ 3 < if 9906 throw then
    len$ >r
    0 find$ dup ($temp0) ! -1 > if
        ($sp@) cell+  here  ($temp0) @ cmove  
        1 (addrOf$) cell+   here ($temp0) @ +  
        1 (addrOf$) (lenof$) cmove
        ($sp@) cell+ ($temp0) @ + r@ +    
        here ($temp0) @ + 1 (addrOf$) (lenof$) +
        len$ r> - ($temp0) @ -  dup >r  cmove
        r> ($temp0) @ + 1 (addrOf$) (lenof$) +
        drop$ drop$ here swap ($")
    else r> drop ($temp0) @ then ;

: ucase$ ( -- ) ( ss: str -- STR) \ "convert to upper case"
    \ On the topmost string, converts all lower case characters to upper case.
    \ Dependencies: WITHIN (core ext) (lenOf$) depth$
    depth$ 1 < if 9902 throw then
    ($sp@) dup (lenOf$) + cell+  ($sp@) cell+  do
       i c@ dup [ char a ] literal  [ char { ] literal within if 
            32 -  i c! else drop then loop ;

: lcase$ ( -- ) ( ss: STR -- str) \ "convert to lower case"
    \ On the topmost string, converts all upper case characters to lower case.
    \ Dependencies: WITHIN (core ext) (lenOf$) depth$
    depth$ 1 < if 9902 throw then 
    ($sp@) dup (lenOf$) + cell+  ($sp@) cell+  do
       i c@ dup [ char A ] literal  [ char [ ] literal within if 
            32 +  i c! else drop then loop ;

: ==$? ( -- flag ) ( ss: -- ) \ "is equal to string"
    \ Performs a case-sensitive comparison of the topmost two strings on the 
    \ string stack, returning true if their length and contents are identical,
    \ otherwise returning false.
    \ Dependencies: depth$ (addrOf$) (lenOf$)
    depth$ 2 < if 9903 throw then 
    len$ 1 (addrOf$) (lenOf$) = if
        1 (addrOf$) cell+
        ($sp@) cell+ ($sp@) +  ($sp@) cell+ do
            dup c@  i c@  <> if drop false leave then 1+ loop
        dup if drop true then 
    else false then ;
   
: val$ ( -- ud ) ( ss: str -- )
    \ Interprets the topmost string as an integer number, returning its value
    \ on the data stack as an unsigned double integer (see 6.1.0570)
    \ Dependencies: (lenOf$) drop$
    0 0 ( ud1) ($sp@) dup (lenOf$) swap cell+ swap ( c-addr1 u1)
    >number if 9907 throw then
    drop  drop$ ;
    
: ud>$ ( ud -- ) ( ss: -- str )
    \ Pushes the unsigned double number on the data stack to the string stack.
    \ Dependencies: ($")
    <# #s #> ($") ;

: $.s ( -- ) ( ss: -- )
    \ Non-destructively displays the string stack.
    \ Dependencies: depth$ len$ .$ .R (core ext, 6.2.0210)
    cr  depth$ 0> if
        ($sp@)  depth$
        ." Index|Length|String" cr
        ." -----+------+------" cr 
        0 begin
            depth$ 0> while
                dup 5 .r ." |" len$ 6 .r  ." |" .$  1+ cr
        repeat  drop
        ($depth) !  ($sp) !  cr
    else
        ." String stack is empty." cr
    then
    ." Allocated stack space:" ($sEnd) ($sSize) + ($sp@) - 4 .r ."  bytes" cr
    ."     Total stack space:" ($sSize) 4 .r ."  bytes" cr
    ." Stack space remaining:" ($sp@) ($sEnd) - 4 .r ."  bytes" cr ;

base ! \ restore systems' current number base



: .s cr depth 0 > if
\ my .s - I like it horizontal, baby :-)
        1 depth 1- do i 1- pick u. -1 +loop ." <-- top"
    else
        cr ." stack is empty"
    then cr ;

$" blue"
$" green"
$" red"
